*******************************************************************************************************
* On the Optimal Design of Transfers and Income-Tax Progressivity
* Data: Panel Study of Income Dynamics
*******************************************************************************************************

clear
clear matrix
clear mata
macro drop _all
set more off
set maxvar 10000

********************************************************************************
* Convert Raw Data to Stata
********************************************************************************

global baseline_path "`c(pwd)'"

* Save family files as .dta
foreach num of numlist 1970(1)1992 {
	cd "$baseline_path\Datasets\Raw_Data\fam`num'"
	do FAM`num'
	cd "$baseline_path\Datasets\Prepared_Data"
	save fam`num'.dta, replace
}

* Save individual file as .dta
cd "$baseline_path\Datasets\Raw_Data\ind2019er"
do IND2019ER
cd "$baseline_path\Datasets\Prepared_Data"
save ind2019er.dta, replace

********************************************************************************
* Combine Individual and Family Files for all Years
********************************************************************************

* 1970

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30045 rel_head			// relationship to head
ren ER30043 famid				// family ID of this wave
ren ER30044 seqno				// sequence number
ren ER30046 age					// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind70_prep.dta, replace	// save variables from individual file

use fam1970.dta, clear			// load family file
ren V1230 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V1102 famid 				// family ID of this wave
ren V1239 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V1485 == 7 | V1485 == 8
ren V1196 labinc_h				// labor income head
ren V1191 wageinc_h				// wage income head
ren V1201 businc_brac			// bracketed business income
ren V1200 farminc_brac			// bracketed farm income
ren V1214 uiinc_h				// UI, worker's compensation head
ren V1198 labinc_s				// labor income spouse
ren V1109 comp_change			// family composition change
ren V1238 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc_brac farminc_brac uiinc_h college_h	// keep only relevant variables
save fam70_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind70_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam70_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1970										// generate year variable
save ind_fam_70.dta, replace						// save						


* 1971

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30069 rel_head			// relationship to head
ren ER30067 famid				// family ID of this wave
ren ER30068 seqno				// sequence number
ren ER30070 age					// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind71_prep.dta, replace	// save variables from individual file

use fam1971.dta, clear			// load family file
ren V1932 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V1802 famid 				// family ID of this wave
ren V1942 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V2197 == 7 | V2197 == 8
ren V1897 labinc_h				// labor income head
ren V1892 wageinc_h				// wage income head
ren V1902 businc_brac			// bracketed business income
ren V1901 farminc_brac			// bracketed farm income
ren V1916 uiinc_h				// UI, worker's compensation head
ren V1899 labinc_s				// labor income spouse
ren V1809 comp_change			// family composition change
ren V1941 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc_brac farminc_brac uiinc_h college_h	// keep only relevant variables
save fam71_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind71_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam71_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1971										// generate year variable
save ind_fam_71.dta, replace						// save						


* 1972

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30093 rel_head			// relationship to head
ren ER30091 famid				// family ID of this wave
ren ER30092 seqno				// sequence number
ren ER30094 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind72_prep.dta, replace	// save variables from individual file

use fam1972.dta, clear			// load family file
ren V2533 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V2402 famid 				// family ID of this wave
ren V2542 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V2823 == 7 | V2823 == 8
ren V2498 labinc_h				// labor income head
ren V2493 wageinc_h				// wage income head
ren V2503 businc_brac			// bracketed business income
ren V2502 farminc_brac			// bracketed farm income
ren V2517 uiinc_h				// UI, worker's compensation head
ren V2500 labinc_s				// labor income spouse
ren V2410 comp_change			// family composition change
ren V2541 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc_brac farminc_brac uiinc_h college_h	// keep only relevant variables
save fam72_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind72_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam72_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1972 									// generate year variable
save ind_fam_72.dta, replace						// save						


* 1973

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30117 famid				// family ID of this wave
ren ER30119 rel_head			// relationship to head
ren ER30118 seqno				// sequence number
ren ER30120 age 				// age
keep famid68 persid famid rel_head seqno	// keep only relevant variables
save ind73_prep.dta, replace	// save variables from individual file

use fam1973.dta, clear			// load family file
ren V3085 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V3002 famid 				// family ID of this wave
ren V3095 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V3241 == 7 | V3241 == 8
ren V3051 labinc_h				// labor income head
ren V3046 wageinc_h				// wage income head
ren V3056 businc_brac			// bracketed business income
ren V3055 farminc_brac			// bracketed farm income
ren V3070 uiinc_h				// UI, worker's compensation head
ren V3053 labinc_s				// labor income spouse
ren V3010 comp_change			// family composition change
ren V3094 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc_brac farminc_brac uiinc_h college_h	// keep only relevant variables
save fam73_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind73_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam73_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1973 									// generate year variable
save ind_fam_73.dta, replace						// save						


* 1974

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30140 rel_head			// relationship to head
ren ER30138 famid				// family ID of this wave
ren ER30139 seqno				// sequence number
ren ER30141 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind74_prep.dta, replace	// save variables from individual file

use fam1974.dta, clear			// load family file
ren V3497 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V3402 famid 				// family ID of this wave
ren V3508 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V3663 == 7 | V3663 == 8
ren V3463 labinc_h				// labor income head
ren V3458 wageinc_h				// wage income head
ren V3468 businc_brac			// bracketed business income
ren V3467 farminc_brac			// bracketed farm income
ren V3482 uiinc_h				// UI, worker's compensation head
ren V3465 labinc_s				// labor income spouse
ren V3410 comp_change			// family composition change
ren V3507 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc_brac farminc_brac uiinc_h college_h	// keep only relevant variables
save fam74_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind74_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam74_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1974 									// generate year variable
save ind_fam_74.dta, replace						// save						


* 1975

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30162 rel_head			// relationship to head
ren ER30160 famid				// family ID of this wave
ren ER30161 seqno				// sequence number
ren ER30163 age					// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind75_prep.dta, replace	// save variables from individual file

use fam1975.dta, clear			// load family file
ren V3909 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V3802 famid 				// family ID of this wave
ren V3921 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V4099 == 1
ren V3863 labinc_h				// labor income head
ren V3858 wageinc_h				// wage income head
ren V3868 businc_brac			// bracketed business income
ren V3867 farminc_brac			// bracketed farm income
ren V3882 uiinc_h				// UI, worker's compensation head
ren V3865 labinc_s				// labor income spouse
ren V3810 comp_change			// family composition change
ren V3920 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc_brac farminc_brac uiinc_h college_h	// keep only relevant variables
save fam75_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind75_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam75_prep.dta		// merge
keep if _merge == 3 								// (one not matched from using)
drop _merge											// drop merge variable
gen year = 1975 									// generate year variable
save ind_fam_75.dta, replace						// save						


* 1976

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30190 rel_head			// relationship to head
ren ER30188 famid				// family ID of this wave
ren ER30189 seqno				// sequence number
ren ER30191 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind76_prep.dta, replace	// save variables from individual file

use fam1976.dta, clear			// load family file
ren V4423 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V4302 famid 				// family ID of this wave
ren V4436 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V4690 == 1
ren V5031 labinc_h				// labor income head
ren V4373 wageinc_h				// wage income head
ren V4382 businc				// business income
ren V4381 farminc				// farm income
ren V4397 uiinc_h				// UI, worker's compensation head
ren V4379 labinc_s				// labor income spouse
ren V4310 comp_change			// family composition change
ren V4435 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam76_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind76_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam76_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1976 									// generate year variable
save ind_fam_76.dta, replace						// save						


* 1977

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30219 rel_head			// relationship to head
ren ER30217 famid				// family ID of this wave
ren ER30218 seqno				// sequence number
ren ER30220 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind77_prep.dta, replace	// save variables from individual file

use fam1977.dta, clear			// load family file
ren V5336 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V5202 famid 				// family ID of this wave
ren V5350 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V5614 == 1
ren V5627 labinc_h				// labor income head
ren V5283 wageinc_h				// wage income head
ren V5292 businc				// business income
ren V5291 farminc				// farm income
egen uiinc_h = rowtotal(V5308 V5309), missing	// UI, worker's compensation head
ren V5289 labinc_s				// labor income spouse
ren V5210 comp_change			// family composition change
ren V5349 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam77_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind77_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam77_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1977 									// generate year variable
save ind_fam_77.dta, replace						// save		


* 1978

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30248 rel_head			// relationship to head
ren ER30246 famid				// family ID of this wave
ren ER30247 seqno				// sequence number
ren ER30249 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind78_prep.dta, replace	// save variables from individual file

use fam1978.dta, clear			// load family file
ren V5835 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V5702 famid 				// family ID of this wave
ren V5850 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V6163 == 1
ren V6174 labinc_h				// labor income head
ren V5782 wageinc_h				// wage income head
ren V5791 businc				// business income
ren V5790 farminc				// farm income
egen uiinc_h = rowtotal(V5808 V5809), missing	// UI, worker's compensation head
ren V5788 labinc_s				// labor income spouse
ren V5710 comp_change			// family composition change
ren V5849 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam78_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind78_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam78_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1978 									// generate year variable
save ind_fam_78.dta, replace						// save		


* 1979

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				//  person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30285 rel_head			// relationship to head
ren ER30283 famid				// family ID of this wave
ren ER30284 seqno				// sequence number
ren ER30286 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind79_prep.dta, replace	// save variables from individual file

use fam1979.dta, clear			// load family file
ren V6446 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V6302 famid 				// family ID of this wave
ren V6462 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V6760 == 1
ren V6767 labinc_h				// labor income head
ren V6391 wageinc_h				// wage income head
ren V6401 businc				// business income
ren V6400 farminc				// farm income
egen uiinc_h = rowtotal(V6419 V6420), missing	// UI, worker's compensation head
ren V6398 labinc_s				// labor income spouse
ren V6310 comp_change			// family composition change
ren V6461 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam79_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind79_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam79_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1979									    // generate year variable
save ind_fam_79.dta, replace						// save


* 1980

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30315 rel_head			// relationship to head
ren ER30313 famid				// family ID of this wave
ren ER30314 seqno				// sequence number
ren ER30316 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind80_prep.dta, replace	// save variables from individual file

use fam1980.dta, clear			// load family file
ren V7050 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V6902 famid 				// family ID of this wave
ren V7067 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V7393 == 1
ren V7413 labinc_h				// labor income head
ren V6981 wageinc_h				// wage income head
ren V6991 businc				// business income
ren V6990 farminc				// farm income
egen uiinc_h = rowtotal(V7009 V7010), missing	// UI, worker's compensation head
ren V6988 labinc_s				// labor income spouse
ren V6910 comp_change			// family composition change
ren V7066 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam80_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind80_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam80_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1980									    // generate year variable
save ind_fam_80.dta, replace						// save					


* 1981

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30345 rel_head			// relationship to head
ren ER30343 famid				// family ID of this wave
ren ER30344 seqno				// sequence number
ren ER30346 age 				// age
keep famid68 persid famid rel_head seqno	// keep only relevant variables
save ind81_prep.dta, replace	// save variables from individual file

use fam1981.dta, clear			// load family file
ren V7642 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V7502 famid 				// family ID of this wave
ren V7658 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V8045 == 1
ren V8066 labinc_h				// labor income head
ren V7573 wageinc_h				// wage income head
ren V7583 businc				// business income
ren V7582 farminc				// farm income
egen uiinc_h = rowtotal(V7601 V7602), missing	// UI, worker's compensation head
ren V7580 labinc_s				// labor income spouse
ren V7510 comp_change			// family composition change
ren V7657 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam81_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind81_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam81_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1981									    // generate year variable
save ind_fam_81.dta, replace						// save					


* 1982

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30375 rel_head			// relationship to head
ren ER30373 famid				// family ID of this wave
ren ER30374 seqno				// sequence number
ren ER30376 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind82_prep.dta, replace	// save variables from individual file

use fam1982.dta, clear			// load family file
ren V8335 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V8202 famid 				// family ID of this wave
ren V8352 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V8669 == 1
ren V8690 labinc_h				// labor income head
ren V8265 wageinc_h				// wage income head
ren V8276 businc				// business income
ren V8275 farminc				// farm income
egen uiinc_h = rowtotal(V8294 V8295), missing	// UI, worker's compensation head
ren V8273 labinc_s				// labor income spouse
ren V8210 comp_change			// family composition change
ren V8351 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam82_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind82_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam82_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1982									    // generate year variable
save ind_fam_82.dta, replace						// save					


* 1983

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30401 rel_head			// relationship to head
ren ER30399 famid				// family ID of this wave
ren ER30400 seqno				// sequence number
ren ER30402 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind83_prep.dta, replace	// save variables from individual file

use fam1983.dta, clear			// load family file
ren V8943 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V8802 famid 				// family ID of this wave
ren V8961 age_h					// age head
gen college_h = 0				// college head
replace college_h = 1 if V9355 == 1
ren V9376 labinc_h				// labor income head
ren V8873 wageinc_h				// wage income head
ren V8884 businc				// business income
ren V8883 farminc				// farm income
egen uiinc_h = rowtotal(V8902 V8903), missing	// UI, worker's compensation head
ren V8881 labinc_s				// labor income spouse
ren V8810 comp_change			// family composition change
ren V8960 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam83_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind83_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam83_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1983									    // generate year variable
save ind_fam_83.dta, replace						// save					


* 1984

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30431 rel_head			// relationship to head
ren ER30429 famid				// family ID of this wave
ren ER30430 seqno				// sequence number
ren ER30432 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind84_prep.dta, replace	// save variables from individual file

use fam1984.dta, clear			// load family file
ren V10400 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V10002 famid 				// family ID of this wave
ren V10419 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V11002 == 1
ren V11023 labinc_h				// labor income head
ren V10256 wageinc_h			// wage income head
ren V10266 businc				// business income
ren V10265 farminc				// farm income
egen uiinc_h = rowtotal(V10295 V10296), missing	// UI, worker's compensation head
ren V10263 labinc_s				// labor income spouse
ren V10010 comp_change			// family composition change
ren V10418 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam84_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind84_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam84_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1984									    // generate year variable
save ind_fam_84.dta, replace						// save					

 
* 1985

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30465 rel_head			// relationship to head
ren ER30463 famid				// family ID of this wave
ren ER30464 seqno				// sequence number
ren ER30466 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind85_prep.dta, replace	// save variables from individual file

use fam1985.dta, clear			// load family file
ren V11581 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V11102 famid 				// family ID of this wave
ren V11606 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V11960 == 1
ren V12372 labinc_h				// labor income head
ren V11397 wageinc_h			// wage income head
ren V11407 businc				// business income
ren V11406 farminc				// farm income
egen uiinc_h = rowtotal(V11440 V11441), missing	// UI, worker's compensation head
ren V11404 labinc_s				// labor income spouse
ren V11112 comp_change			// family composition change
ren V11605 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam85_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind85_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam85_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1985									    // generate year variable
save ind_fam_85.dta, replace						// save					


* 1986

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30500 rel_head			// relationship to head
ren ER30498 famid				// family ID of this wave
ren ER30499 seqno 				// sequence number
ren ER30501 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind86_prep.dta, replace	// save variables from individual file

use fam1986.dta, clear			// load family file
ren V12988 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V12502 famid 				// family ID of this wave
ren V13011 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V13583 == 1
ren V13624 labinc_h				// labor income head
ren V12796 wageinc_h			// wage income head
ren V12806 businc				// business income
ren V12805 farminc				// farm income
egen uiinc_h = rowtotal(V12839 V12840), missing	// UI, worker's compensation head
ren V12803 labinc_s				// labor income spouse
ren V12510 comp_change			// family composition change
ren V13010 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam86_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind86_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam86_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1986									    // generate year variable
save ind_fam_86.dta, replace						// save					


* 1987

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30537 rel_head			// relationship to head
ren ER30535 famid				// family ID of this wave
ren ER30536 seqno				// sequence number
ren ER30538 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind87_prep.dta, replace	// save variables from individual file

use fam1987.dta, clear			// load family file
ren V14090 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V13702 famid 				// family ID of this wave
ren V14114 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V14630 == 1
ren V14671 labinc_h				// labor income head
ren V13898 wageinc_h			// wage income head
ren V13908 businc				// business income
ren V13907 farminc				// farm income
egen uiinc_h = rowtotal(V13941 V13942), missing	// UI, worker's compensation head
ren V13905 labinc_s				// labor income spouse
ren V13710 comp_change			// family composition change
ren V14113 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam87_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind87_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam87_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1987									    // generate year variable
save ind_fam_87.dta, replace						// save					


* 1988

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30572 rel_head			// relationship to head
ren ER30570 famid				// family ID of this wave
ren ER30571 seqno				// sequence number
ren ER30573 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind88_prep.dta, replace	// save variables from individual file

use fam1988.dta, clear			// load family file
ren V15105 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V14802 famid 				// family ID of this wave
ren V15130 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V16104 == 1
ren V16145 labinc_h				// labor income head
ren V14913 wageinc_h			// wage income head
ren V14923 businc				// business income
ren V14922 farminc				// farm income
egen uiinc_h = rowtotal(V14956 V14957), missing	// UI, worker's compensation head
ren V14920 labinc_s				// labor income spouse
ren V14810 comp_change			// family composition change
ren V15129 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam88_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind88_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam88_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1988									    // generate year variable
save ind_fam_88.dta, replace						// save					


* 1989

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30608 rel_head			// relationship to head
ren ER30606 famid				// family ID of this wave
ren ER30607 seqno				// sequence number
ren ER30609 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind89_prep.dta, replace	// save variables from individual file

use fam1989.dta, clear			// load family file
ren V16605 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V16302 famid 				// family ID of this wave
ren V16631 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V17501 == 1
ren V17534 labinc_h				// labor income head
ren V16413 wageinc_h			// wage income head
ren V16423 businc				// business income
ren V16422 farminc				// farm income
egen uiinc_h = rowtotal(V16456 V16457), missing	// UI, worker's compensation head
ren V16420 labinc_s				// labor income spouse
ren V16310 comp_change			// family composition change
ren V16630 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam89_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind89_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam89_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1989									    // generate year variable
save ind_fam_89.dta, replace						// save					


* 1990

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30644 rel_head			// relationship to head
ren ER30642 famid				// family ID of this wave
ren ER30643 seqno				// sequence number
ren ER30645 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind90_prep.dta, replace	// save variables from individual file

use fam1990.dta, clear			// load family file
ren V18021 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V17702 famid 				// family ID of this wave
ren V18049 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V18832 == 1
ren V18878 labinc_h				// labor income head
ren V17829 wageinc_h			// wage income head
ren V17839 businc				// business income
ren V17838 farminc				// farm income
egen uiinc_h = rowtotal(V17872 V17873), missing	// UI, worker's compensation head
ren V17836 labinc_s				// labor income spouse
ren V17710 comp_change			// family composition change
ren V18048 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam90_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind90_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam90_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1990									    // generate year variable
save ind_fam_90.dta, replace						// save					


* 1991

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30691 rel_head			// relationship to head
ren ER30689 famid				// family ID of this wave
ren ER30690 seqno				// sequence number
ren ER30692 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind91_prep.dta, replace	// save variables from individual file

use fam1991.dta, clear			// load family file
ren V19321 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V19002 famid 				// family ID of this wave
ren V19349 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V20132 == 1
ren V20178 labinc_h				// labor income head
ren V19129 wageinc_h			// wage income head
ren V19139 businc				// business income
ren V19138 farminc				// farm income
egen uiinc_h = rowtotal(V19172 V19173), missing	// UI, worker's compensation head
ren V19136 labinc_s				// labor income spouse
ren V19010 comp_change			// family composition change
ren V19348 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam91_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind91_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam91_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1991									    // generate year variable
save ind_fam_91.dta, replace						// save					


* 1992

use ind2019er.dta, clear		// load individual file
ren ER30001 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren ER30002 persid				// person number (jointly with famid1968 identifies individual uniquely across waves)
ren ER30735 rel_head			// relationship to head
ren ER30733 famid				// family ID of this wave
ren ER30734 seqno				// sequence number
ren ER30736 age 				// age
keep famid68 persid famid rel_head seqno age	// keep only relevant variables
save ind92_prep.dta, replace	// save variables from individual file

use fam1992.dta, clear			// load family file
ren V20621 famid68				// 1968 family ID (jointly with persid identifies individual uniquely across waves)
ren V20302 famid 				// family ID of this wave
ren V20651 age_h				// age head
gen college_h = 0				// college head
replace college_h = 1 if V21438 == 1
ren V21484 labinc_h				// labor income head
ren V20429 wageinc_h			// wage income head
ren V20439 businc				// business income
ren V20438 farminc				// farm income
egen uiinc_h = rowtotal(V20472 V20473), missing	// UI, worker's compensation head
ren V20436 labinc_s				// labor income spouse
ren V20310 comp_change			// family composition change
ren V20650 fam_size				// family size
keep famid68 famid age_h labinc_h labinc_s comp_change fam_size businc farminc uiinc_h college_h	// keep only relevant variables
save fam92_prep.dta, replace	// save variables from family file

* Combine individual and family file
use ind92_prep.dta, clear							// load individual data
merge m:1 famid68 famid using fam92_prep.dta		// merge
keep if _merge == 3 								// all from using were matched
drop _merge											// drop merge variable
gen year = 1992									    // generate year variable
save ind_fam_92.dta, replace						// save																	

********************************************************************************
* Combine all Yearly Files
********************************************************************************

* Combine all yearly files
use "$baseline_path\Datasets\Prepared_Data\ind_fam_70.dta", clear
foreach num of numlist 71(1)92 {
	append using "$baseline_path\Datasets\Prepared_Data\ind_fam_`num'.dta"
}

label var famid68 "Family ID 1968"
label var persid "Person ID"
label var famid "Family ID Current Year"
label var seqno "Sequence Number"
label var rel_head "Relationship to Head"
label var labinc_h "Head Labor Income"
label var businc "Asset part of business income"
label var businc_brac "Bracketed asset part of business income"
label var farminc "Asset part of farm income"
label var farminc_brac "Bracketed asset part of farm income"
label var uiinc_h "UI and worker's com head"
label var labinc_s "Spouse Labor Income"
label var age_h "Head Age"
label var college_h "Head College"
label var comp_change "Family Composition Change"
label var fam_size "Family size"
label var year "Year"
label var age "Age"

* Generate unique person identifier
gen id = 1000*famid68 + persid
label var id "Unique Person Identifier"

* Declare panel
xtset id year

********************************************************************************
* Sample Selection
********************************************************************************

* Use only representative SRC sample
drop if famid68 > 3000

* Use only heads of household
keep if (rel_head == 1 & year < 1983 & seqno == 1) | (rel_head == 10 & year >= 1983 & seqno == 1)

* Age restriction
drop if age_h < 25
drop if age_h > 60

duplicates report famid year

********************************************************************************
* Earnings Variables
********************************************************************************

* Compute earnings
local version_robust = 1
* 1: labor income head, labor income spouse, UI head, asset part of business and farm income
* 2: labor income head, labor income spouse, UI head, asset part of business and farm income (only from 1976)
* 3: labor income head, labor income spouse, UI head
* 4: labor income head, labor income spouse
* 5: labor income head, labor income spouse, asset part of business and farm income

if `version_robust' == 1 {
	
	* Deal with bracketed asset part of business income
	replace businc = 0 if businc_brac == 0 & year <= 1975
	replace businc = 250 if businc_brac == 1 & year <= 1975
	replace businc = 750 if businc_brac == 2 & year <= 1975
	replace businc = 1500 if businc_brac == 3 & year <= 1975
	replace businc = 2500 if businc_brac == 4 & year <= 1975
	replace businc = 4000 if businc_brac == 5 & year <= 1975
	replace businc = 6250 if businc_brac == 6 & year <= 1975
	replace businc = 8750 if businc_brac == 7 & year <= 1975
	replace businc = 12000 if businc_brac == 8 & year <= 1975
	replace businc = . if businc_brac == 9 & year <= 1975
	
	* Deal with bracketed asset part of farm income
	replace farminc = 0 if farminc_brac == 0 & year <= 1975
	replace farminc = 250 if farminc_brac == 1 & year <= 1975
	replace farminc = 750 if farminc_brac == 2 & year <= 1975
	replace farminc = 1500 if farminc_brac == 3 & year <= 1975
	replace farminc = 2500 if farminc_brac == 4 & year <= 1975
	replace farminc = 4000 if farminc_brac == 5 & year <= 1975
	replace farminc = 6250 if farminc_brac == 6 & year <= 1975
	replace farminc = 8750 if farminc_brac == 7 & year <= 1975
	replace farminc = 12000 if farminc_brac == 8 & year <= 1975
	replace farminc = . if farminc_brac == 9 & year <= 1975
	
	* Household labor earnings
	egen yl = rowtotal(labinc_h labinc_s uiinc_h businc farminc), missing
	label var yl "Household labor income"
	
}
else if `version_robust' == 2 {
	
	drop if year <= 1976
	
	* Household labor earnings
	egen yl = rowtotal(labinc_h labinc_s uiinc_h businc farminc), missing
	label var yl "Household labor income"
	
}
else if `version_robust' == 3 {
	
	* Household labor earnings
	egen yl = rowtotal(labinc_h labinc_s uiinc_h), missing
	label var yl "Household labor income"
	
}
else if `version_robust' == 4 {
	
	* Household labor earnings
	egen yl = rowtotal(labinc_h labinc_s), missing
	label var yl "Household labor income"
	
}
else if `version_robust' == 5 {
	
	* Deal with bracketed asset part of business income
	replace businc = 0 if businc_brac == 0 & year <= 1975
	replace businc = 250 if businc_brac == 1 & year <= 1975
	replace businc = 750 if businc_brac == 2 & year <= 1975
	replace businc = 1500 if businc_brac == 3 & year <= 1975
	replace businc = 2500 if businc_brac == 4 & year <= 1975
	replace businc = 4000 if businc_brac == 5 & year <= 1975
	replace businc = 6250 if businc_brac == 6 & year <= 1975
	replace businc = 8750 if businc_brac == 7 & year <= 1975
	replace businc = 12000 if businc_brac == 8 & year <= 1975
	replace businc = . if businc_brac == 9 & year <= 1975
	
	* Deal with bracketed asset part of farm income
	replace farminc = 0 if farminc_brac == 0 & year <= 1975
	replace farminc = 250 if farminc_brac == 1 & year <= 1975
	replace farminc = 750 if farminc_brac == 2 & year <= 1975
	replace farminc = 1500 if farminc_brac == 3 & year <= 1975
	replace farminc = 2500 if farminc_brac == 4 & year <= 1975
	replace farminc = 4000 if farminc_brac == 5 & year <= 1975
	replace farminc = 6250 if farminc_brac == 6 & year <= 1975
	replace farminc = 8750 if farminc_brac == 7 & year <= 1975
	replace farminc = 12000 if farminc_brac == 8 & year <= 1975
	replace farminc = . if farminc_brac == 9 & year <= 1975
	
	* Household labor earnings
	egen yl = rowtotal(labinc_h labinc_s businc farminc), missing
	label var yl "Household labor income"
	
}


* Shift to year in which income is earned
replace yl = F.yl

* CPI (2013 = 100)
gen cpi = .
label var cpi "CPI (2013 = 100)"
* Consumer Price Index: Total All Items for the United States; FRED: CPALTT01USA661S
replace cpi = 14.93122 if year == 1968
replace cpi = 15.74682 if year == 1969
replace cpi = 16.66616 if year == 1970
replace cpi = 17.3816 if year == 1971
replace cpi = 17.95037 if year == 1972
replace cpi = 19.0593 if year == 1973
replace cpi = 21.16627 if year == 1974
replace cpi = 23.10154 if year == 1975
replace cpi = 24.42868 if year == 1976
replace cpi = 26.01695 if year == 1977
replace cpi = 28.0023 if year == 1978
replace cpi = 31.15381 if year == 1979
replace cpi = 35.3749 if year == 1980
replace cpi = 39.03079 if year == 1981
replace cpi = 41.42394 if year == 1982
replace cpi = 42.75466 if year == 1983
replace cpi = 44.59334 if year == 1984
replace cpi = 46.17446 if year == 1985
replace cpi = 47.05087 if year == 1986
replace cpi = 48.77508 if year == 1987
replace cpi = 50.764 if year == 1988
replace cpi = 53.21438 if year == 1989
replace cpi = 56.08687 if year == 1990
replace cpi = 58.46213 if year == 1991
replace cpi = 60.23284 if year == 1992

* Real labor income household
gen yl_real = yl*98.6/cpi
label var yl_real "Real Household Labor Income (earned this year)"

* Earnings threshold
drop if yl_real < 5000
*drop if yl_real < 1500 
*drop if yl_real < 10000

* Log labor income household
gen yl_log = log(yl_real)
label var yl_log "Log of Real Household Labor Income (earned this year)"

* Household labor earnings growth
gen yl_gr = D.yl_log
label var yl_gr "Household Labor Income Growth Rate"

* Residual log labor income HH: Time and age
reg yl_log i.year i.age_h i.college_h
*reg yl_log i.year i.age_h
predict yl_log_resid, residuals
label var yl_log_resid "Log household labor income taking out year/age/college effect"


* Residual labor earnings growth
gen yl_resid_gr = D.yl_log_resid
label var yl_resid_gr "Household Labor Income Growth Rate taking out year/age/college effect"


********************************************************************************
* Statistics of the Earnings Growth Distribution
********************************************************************************

* Labor earnings growth distribution household
sum yl_gr, d
sum yl_resid_gr, d

* No family composition change for head and spouse between t and t-1 and t+1 and t
gen change_hs = .
replace change_hs = 0 if comp_change <= 1
replace change_hs = 1 if comp_change > 1 & comp_change != .
gen change_hs_2 = F.change_hs
label var change_hs "Family composition change head/spouse from t-1 to t (1=yes)"
label var change_hs_2 "Family composition change head/spouse from t to t+1 (1=yes)"

* Labor earnings growth distribution household
*sum yl_gr if change_hs == 0 & change_hs_2 == 0, d
* Residualized with no head/spouse composition change
sum yl_resid_gr if change_hs == 0 & change_hs_2 == 0, d

